In [1]:
library(data.table)
In [2]:
GBD <- read.table("../Data/DALY_YLL_deaths_per_region_and_27_diseases_2005.txt")
GBD <- GBD[order(GBD$Region,GBD$Disease),]
In [3]:
Mgbd <- read.table("../Data/27_gbd_groups.txt")
sms <- list.files("/media/igna/Elements/HotelDieu/Cochrane/MappingRCTs_vs_Burden/Replicates/Metrics_over_repl/")
dis <- as.numeric(substr(sms,25,nchar(sms)-4))
In [4]:
k <- 1
DF <- fread(paste(c("/media/igna/Elements/HotelDieu/Cochrane/MappingRCTs_vs_Burden/Replicates/Metrics_over_repl/Metrics_over_replicates_",
as.character(k),".txt"),collapse=""))
regs <- sort(unique(DF$Region))
regs <- regs[regs!="All" & regs!="Non-HI"]
In [5]:
data_f <- data.frame()
In [6]:
for(k in dis[dis!=0]){
DF <- fread(paste(c("/media/igna/Elements/HotelDieu/Cochrane/MappingRCTs_vs_Burden/Replicates/Metrics_over_repl/Metrics_over_replicates_",
as.character(k),".txt"),collapse=""))
DFr <- DF[DF$Region%in%regs & DF$Dis == "dis",]
DFr$RCTs_all <- rep(DF$RCTs[DF$Dis=="dis" & DF$Region=="All"],each=length(regs))
DFr$RCTs_NHI <- rep(DF$RCTs[DF$Dis=="dis" & DF$Region=="Non-HI"],each=length(regs))
DFr$Patients_all <- rep(DF$Patients[DF$Dis=="dis" & DF$Region=="All"],each=length(regs))
DFr$Patients_NHI <- rep(DF$Patients[DF$Dis=="dis" & DF$Region=="Non-HI"],each=length(regs))
nb_sims <- nrow(DFr)/length(regs)
DFr$RCTs_prop_all <- 100*DFr$RCTs/DFr$RCTs_all
DFr$RCTs_prop_NHI <- 100*DFr$RCTs/DFr$RCTs_NHI
DFr$Patients_prop_all <- 100*DFr$Patients/DFr$Patients_all
DFr$Patients_prop_NHI <- 100*DFr$Patients/DFr$Patients_NHI
gbd_mt <- GBD[GBD$Disease==as.character(Mgbd$x[k]) & GBD$metr=="daly" & GBD$Region!="All",]
gbd_mt$burden_prop_all <- 100*gbd_mt$burden/sum(gbd_mt$burden)
gbd_mt$burden_prop_NHI <- 100*gbd_mt$burden/sum(gbd_mt$burden[gbd_mt$Region!="High-income"])
DFr$burden_daly_prop_all <- rep(gbd_mt$burden_prop_all,times=nb_sims)
DFr$burden_daly_prop_NHI <- rep(gbd_mt$burden_prop_NHI,times=nb_sims)
gbd_mt <- GBD[GBD$Disease==as.character(Mgbd$x[k]) & GBD$metr=="death" & GBD$Region!="All",]
gbd_mt$burden_prop_all <- 100*gbd_mt$burden/sum(gbd_mt$burden)
gbd_mt$burden_prop_NHI <- 100*gbd_mt$burden/sum(gbd_mt$burden[gbd_mt$Region!="High-income"])
DFr$burden_death_prop_all <- rep(gbd_mt$burden_prop_all,times=nb_sims)
DFr$burden_death_prop_NHI <- rep(gbd_mt$burden_prop_NHI,times=nb_sims)
gbd_mt <- GBD[GBD$Disease==as.character(Mgbd$x[k]) & GBD$metr=="yld" & GBD$Region!="All",]
gbd_mt$burden_prop_all <- 100*gbd_mt$burden/sum(gbd_mt$burden)
gbd_mt$burden_prop_NHI <- 100*gbd_mt$burden/sum(gbd_mt$burden[gbd_mt$Region!="High-income"])
DFr$burden_yld_prop_all <- rep(gbd_mt$burden_prop_all,times=nb_sims)
DFr$burden_yld_prop_NHI <- rep(gbd_mt$burden_prop_NHI,times=nb_sims)
gbd_mt <- GBD[GBD$Disease==as.character(Mgbd$x[k]) & GBD$metr=="yll" & GBD$Region!="All",]
gbd_mt$burden_prop_all <- 100*gbd_mt$burden/sum(gbd_mt$burden)
gbd_mt$burden_prop_NHI <- 100*gbd_mt$burden/sum(gbd_mt$burden[gbd_mt$Region!="High-income"])
DFr$burden_yll_prop_all <- rep(gbd_mt$burden_prop_all,times=nb_sims)
DFr$burden_yll_prop_NHI <- rep(gbd_mt$burden_prop_NHI,times=nb_sims)
#Suppressing HI values for Non-HI ratio
DFr <- DFr[DFr$Region=="High-income",c(names(DFr)[grep("NHI",names(DFr))]):=0]
DFr$sim <- rep(1:nb_sims,each=length(regs))
df <-
DFr[,.(
#What percentage of RCTs added (reallocated) for perfect alignment
#DALYs
#RCTs all vs burden all
sum(abs(RCTs_prop_all[burden_daly_prop_all>RCTs_prop_all]-burden_daly_prop_all[burden_daly_prop_all>RCTs_prop_all])),
#Patients all vs burden all
sum(abs(Patients_prop_all[burden_daly_prop_all>Patients_prop_all]-burden_daly_prop_all[burden_daly_prop_all>Patients_prop_all])),
#RCTs NHI vs burden NHI
sum(abs(RCTs_prop_NHI[burden_daly_prop_NHI>RCTs_prop_NHI]-burden_daly_prop_NHI[burden_daly_prop_NHI>RCTs_prop_NHI])),
#Patients NHI vs burden NHI
sum(abs(Patients_prop_NHI[burden_daly_prop_NHI>Patients_prop_NHI]-burden_daly_prop_NHI[burden_daly_prop_NHI>Patients_prop_NHI])),
#Deaths
sum(abs(RCTs_prop_all[burden_death_prop_all>RCTs_prop_all]-burden_death_prop_all[burden_death_prop_all>RCTs_prop_all])),
sum(abs(Patients_prop_all[burden_death_prop_all>Patients_prop_all]-burden_death_prop_all[burden_death_prop_all>Patients_prop_all])),
sum(abs(RCTs_prop_NHI[burden_death_prop_NHI>RCTs_prop_NHI]-burden_death_prop_NHI[burden_death_prop_NHI>RCTs_prop_NHI])),
sum(abs(Patients_prop_NHI[burden_death_prop_NHI>Patients_prop_NHI]-burden_death_prop_NHI[burden_death_prop_NHI>Patients_prop_NHI])),
#YLD
sum(abs(RCTs_prop_all[burden_yld_prop_all>RCTs_prop_all]-burden_yld_prop_all[burden_yld_prop_all>RCTs_prop_all])),
sum(abs(Patients_prop_all[burden_yld_prop_all>Patients_prop_all]-burden_yld_prop_all[burden_yld_prop_all>Patients_prop_all])),
sum(abs(RCTs_prop_NHI[burden_yld_prop_NHI>RCTs_prop_NHI]-burden_yld_prop_NHI[burden_yld_prop_NHI>RCTs_prop_NHI])),
sum(abs(Patients_prop_NHI[burden_yld_prop_NHI>Patients_prop_NHI]-burden_yld_prop_NHI[burden_yld_prop_NHI>Patients_prop_NHI])),
#YLL
sum(abs(RCTs_prop_all[burden_yll_prop_all>RCTs_prop_all]-burden_yll_prop_all[burden_yll_prop_all>RCTs_prop_all])),
sum(abs(Patients_prop_all[burden_yll_prop_all>Patients_prop_all]-burden_yll_prop_all[burden_yll_prop_all>Patients_prop_all])),
sum(abs(RCTs_prop_NHI[burden_yll_prop_NHI>RCTs_prop_NHI]-burden_yll_prop_NHI[burden_yll_prop_NHI>RCTs_prop_NHI])),
sum(abs(Patients_prop_NHI[burden_yll_prop_NHI>Patients_prop_NHI]-burden_yll_prop_NHI[burden_yll_prop_NHI>Patients_prop_NHI])),
#What percentage of RCTs added (reallocated) for No_gap
#DALYs
#RCTs all vs burden all
sum(abs(RCTs_prop_all[burden_daly_prop_all>2*RCTs_prop_all]-burden_daly_prop_all[burden_daly_prop_all>2*RCTs_prop_all]/2)),
#Patients all vs burden all
sum(abs(Patients_prop_all[burden_daly_prop_all>2*Patients_prop_all]-burden_daly_prop_all[burden_daly_prop_all>2*Patients_prop_all]/2)),
#RCTs NHI vs burden NHI
sum(abs(RCTs_prop_NHI[burden_daly_prop_NHI>2*RCTs_prop_NHI]-burden_daly_prop_NHI[burden_daly_prop_NHI>2*RCTs_prop_NHI]/2)),
#Patients NHI vs burden NHI
sum(abs(Patients_prop_NHI[burden_daly_prop_NHI>2*Patients_prop_NHI]-burden_daly_prop_NHI[burden_daly_prop_NHI>2*Patients_prop_NHI]/2)),
#Deaths
sum(abs(RCTs_prop_all[burden_death_prop_all>2*RCTs_prop_all]-burden_death_prop_all[burden_death_prop_all>2*RCTs_prop_all]/2)),
sum(abs(Patients_prop_all[burden_death_prop_all>2*Patients_prop_all]-burden_death_prop_all[burden_death_prop_all>2*Patients_prop_all]/2)),
sum(abs(RCTs_prop_NHI[burden_death_prop_NHI>2*RCTs_prop_NHI]-burden_death_prop_NHI[burden_death_prop_NHI>2*RCTs_prop_NHI]/2)),
sum(abs(Patients_prop_NHI[burden_death_prop_NHI>2*Patients_prop_NHI]-burden_death_prop_NHI[burden_death_prop_NHI>2*Patients_prop_NHI]/2)),
#YLD
sum(abs(RCTs_prop_all[burden_yld_prop_all>2*RCTs_prop_all]-burden_yld_prop_all[burden_yld_prop_all>2*RCTs_prop_all]/2)),
sum(abs(Patients_prop_all[burden_yld_prop_all>2*Patients_prop_all]-burden_yld_prop_all[burden_yld_prop_all>2*Patients_prop_all]/2)),
sum(abs(RCTs_prop_NHI[burden_yld_prop_NHI>2*RCTs_prop_NHI]-burden_yld_prop_NHI[burden_yld_prop_NHI>2*RCTs_prop_NHI]/2)),
sum(abs(Patients_prop_NHI[burden_yld_prop_NHI>2*Patients_prop_NHI]-burden_yld_prop_NHI[burden_yld_prop_NHI>2*Patients_prop_NHI]/2)),
#YLL
sum(abs(RCTs_prop_all[burden_yll_prop_all>2*RCTs_prop_all]-burden_yll_prop_all[burden_yll_prop_all>2*RCTs_prop_all]/2)),
sum(abs(Patients_prop_all[burden_yll_prop_all>2*Patients_prop_all]-burden_yll_prop_all[burden_yll_prop_all>2*Patients_prop_all]/2)),
sum(abs(RCTs_prop_NHI[burden_yll_prop_NHI>2*RCTs_prop_NHI]-burden_yll_prop_NHI[burden_yll_prop_NHI>2*RCTs_prop_NHI]/2)),
sum(abs(Patients_prop_NHI[burden_yll_prop_NHI>2*Patients_prop_NHI]-burden_yll_prop_NHI[burden_yll_prop_NHI>2*Patients_prop_NHI]/2))
),by="sim"]
#Rq: NAs appear when total number of RCTs or patients overall or across non HI regions is 0, meaning no
#possible share across regions. we suppress them
dui <- df[,lapply(.SD,function(x){quantile(x,probs=c(0.025,0.5,0.975),na.rm=TRUE)}),.SDcols=paste("V",1:32,sep="")]
dui <- cbind(as.character(Mgbd$x[k]),c("low","med","up"),dui)
names(dui)<-c("Disease","UI",
paste(rep(paste(rep(c("RCTs_vs","Patients_vs"),times=8),
rep(c("daly","death","yld","yll"),each=4),
rep(rep(c("all","NHI"),each=2),times=4),sep="_"),2),rep(c("fill","nogap"),each=8*2),sep="_"))
data_f <- rbind(data_f,dui)
}
In [7]:
#All diseases
k <- 0
DF <- fread(paste(c("/media/igna/Elements/HotelDieu/Cochrane/MappingRCTs_vs_Burden/Replicates/Metrics_over_repl/Metrics_over_replicates_",
as.character(k),".txt"),collapse=""))
DFr <- DF[DF$Region%in%regs ,]
DFr$RCTs_all <- rep(DF$RCTs[DF$Region=="All"],each=length(regs))
DFr$RCTs_NHI <- rep(DF$RCTs[DF$Region=="Non-HI"],each=length(regs))
DFr$Patients_all <- rep(DF$Patients[DF$Region=="All"],each=length(regs))
DFr$Patients_NHI <- rep(DF$Patients[DF$Region=="Non-HI"],each=length(regs))
nb_sims <- nrow(DFr)/length(regs)
DFr$RCTs_prop_all <- 100*DFr$RCTs/DFr$RCTs_all
DFr$RCTs_prop_NHI <- 100*DFr$RCTs/DFr$RCTs_NHI
DFr$Patients_prop_all <- 100*DFr$Patients/DFr$Patients_all
DFr$Patients_prop_NHI <- 100*DFr$Patients/DFr$Patients_NHI
gbd_mt <- GBD[GBD$Disease=="all" & GBD$metr=="daly" & GBD$Region!="All",]
gbd_mt$burden_prop_all <- 100*gbd_mt$burden/sum(gbd_mt$burden)
gbd_mt$burden_prop_NHI <- 100*gbd_mt$burden/sum(gbd_mt$burden[gbd_mt$Region!="High-income"])
DFr$burden_daly_prop_all <- rep(gbd_mt$burden_prop_all,times=nb_sims)
DFr$burden_daly_prop_NHI <- rep(gbd_mt$burden_prop_NHI,times=nb_sims)
gbd_mt <- GBD[GBD$Disease=="all" & GBD$metr=="death" & GBD$Region!="All",]
gbd_mt$burden_prop_all <- 100*gbd_mt$burden/sum(gbd_mt$burden)
gbd_mt$burden_prop_NHI <- 100*gbd_mt$burden/sum(gbd_mt$burden[gbd_mt$Region!="High-income"])
DFr$burden_death_prop_all <- rep(gbd_mt$burden_prop_all,times=nb_sims)
DFr$burden_death_prop_NHI <- rep(gbd_mt$burden_prop_NHI,times=nb_sims)
gbd_mt <- GBD[GBD$Disease=="all" & GBD$metr=="yld" & GBD$Region!="All",]
gbd_mt$burden_prop_all <- 100*gbd_mt$burden/sum(gbd_mt$burden)
gbd_mt$burden_prop_NHI <- 100*gbd_mt$burden/sum(gbd_mt$burden[gbd_mt$Region!="High-income"])
DFr$burden_yld_prop_all <- rep(gbd_mt$burden_prop_all,times=nb_sims)
DFr$burden_yld_prop_NHI <- rep(gbd_mt$burden_prop_NHI,times=nb_sims)
gbd_mt <- GBD[GBD$Disease=="all" & GBD$metr=="yll" & GBD$Region!="All",]
gbd_mt$burden_prop_all <- 100*gbd_mt$burden/sum(gbd_mt$burden)
gbd_mt$burden_prop_NHI <- 100*gbd_mt$burden/sum(gbd_mt$burden[gbd_mt$Region!="High-income"])
DFr$burden_yll_prop_all <- rep(gbd_mt$burden_prop_all,times=nb_sims)
DFr$burden_yll_prop_NHI <- rep(gbd_mt$burden_prop_NHI,times=nb_sims)
DFr$sim <- rep(1:nb_sims,each=length(regs))
df <-
DFr[,.(
#What percentage of RCTs added (reallocated) for perfect alignment
#DALYs
#RCTs all vs burden all
sum(abs(RCTs_prop_all[burden_daly_prop_all>RCTs_prop_all]-burden_daly_prop_all[burden_daly_prop_all>RCTs_prop_all])),
#Patients all vs burden all
sum(abs(Patients_prop_all[burden_daly_prop_all>Patients_prop_all]-burden_daly_prop_all[burden_daly_prop_all>Patients_prop_all])),
#RCTs NHI vs burden NHI
sum(abs(RCTs_prop_NHI[burden_daly_prop_NHI>RCTs_prop_NHI]-burden_daly_prop_NHI[burden_daly_prop_NHI>RCTs_prop_NHI])),
#Patients NHI vs burden NHI
sum(abs(Patients_prop_NHI[burden_daly_prop_NHI>Patients_prop_NHI]-burden_daly_prop_NHI[burden_daly_prop_NHI>Patients_prop_NHI])),
#Deaths
sum(abs(RCTs_prop_all[burden_death_prop_all>RCTs_prop_all]-burden_death_prop_all[burden_death_prop_all>RCTs_prop_all])),
sum(abs(Patients_prop_all[burden_death_prop_all>Patients_prop_all]-burden_death_prop_all[burden_death_prop_all>Patients_prop_all])),
sum(abs(RCTs_prop_NHI[burden_death_prop_NHI>RCTs_prop_NHI]-burden_death_prop_NHI[burden_death_prop_NHI>RCTs_prop_NHI])),
sum(abs(Patients_prop_NHI[burden_death_prop_NHI>Patients_prop_NHI]-burden_death_prop_NHI[burden_death_prop_NHI>Patients_prop_NHI])),
#YLD
sum(abs(RCTs_prop_all[burden_yld_prop_all>RCTs_prop_all]-burden_yld_prop_all[burden_yld_prop_all>RCTs_prop_all])),
sum(abs(Patients_prop_all[burden_yld_prop_all>Patients_prop_all]-burden_yld_prop_all[burden_yld_prop_all>Patients_prop_all])),
sum(abs(RCTs_prop_NHI[burden_yld_prop_NHI>RCTs_prop_NHI]-burden_yld_prop_NHI[burden_yld_prop_NHI>RCTs_prop_NHI])),
sum(abs(Patients_prop_NHI[burden_yld_prop_NHI>Patients_prop_NHI]-burden_yld_prop_NHI[burden_yld_prop_NHI>Patients_prop_NHI])),
#YLL
sum(abs(RCTs_prop_all[burden_yll_prop_all>RCTs_prop_all]-burden_yll_prop_all[burden_yll_prop_all>RCTs_prop_all])),
sum(abs(Patients_prop_all[burden_yll_prop_all>Patients_prop_all]-burden_yll_prop_all[burden_yll_prop_all>Patients_prop_all])),
sum(abs(RCTs_prop_NHI[burden_yll_prop_NHI>RCTs_prop_NHI]-burden_yll_prop_NHI[burden_yll_prop_NHI>RCTs_prop_NHI])),
sum(abs(Patients_prop_NHI[burden_yll_prop_NHI>Patients_prop_NHI]-burden_yll_prop_NHI[burden_yll_prop_NHI>Patients_prop_NHI])),
#What percentage of RCTs added (reallocated) for No_gap
#DALYs
#RCTs all vs burden all
sum(abs(RCTs_prop_all[burden_daly_prop_all>2*RCTs_prop_all]-burden_daly_prop_all[burden_daly_prop_all>2*RCTs_prop_all]/2)),
#Patients all vs burden all
sum(abs(Patients_prop_all[burden_daly_prop_all>2*Patients_prop_all]-burden_daly_prop_all[burden_daly_prop_all>2*Patients_prop_all]/2)),
#RCTs NHI vs burden NHI
sum(abs(RCTs_prop_NHI[burden_daly_prop_NHI>2*RCTs_prop_NHI]-burden_daly_prop_NHI[burden_daly_prop_NHI>2*RCTs_prop_NHI]/2)),
#Patients NHI vs burden NHI
sum(abs(Patients_prop_NHI[burden_daly_prop_NHI>2*Patients_prop_NHI]-burden_daly_prop_NHI[burden_daly_prop_NHI>2*Patients_prop_NHI]/2)),
#Deaths
sum(abs(RCTs_prop_all[burden_death_prop_all>2*RCTs_prop_all]-burden_death_prop_all[burden_death_prop_all>2*RCTs_prop_all]/2)),
sum(abs(Patients_prop_all[burden_death_prop_all>2*Patients_prop_all]-burden_death_prop_all[burden_death_prop_all>2*Patients_prop_all]/2)),
sum(abs(RCTs_prop_NHI[burden_death_prop_NHI>2*RCTs_prop_NHI]-burden_death_prop_NHI[burden_death_prop_NHI>2*RCTs_prop_NHI]/2)),
sum(abs(Patients_prop_NHI[burden_death_prop_NHI>2*Patients_prop_NHI]-burden_death_prop_NHI[burden_death_prop_NHI>2*Patients_prop_NHI]/2)),
#YLD
sum(abs(RCTs_prop_all[burden_yld_prop_all>2*RCTs_prop_all]-burden_yld_prop_all[burden_yld_prop_all>2*RCTs_prop_all]/2)),
sum(abs(Patients_prop_all[burden_yld_prop_all>2*Patients_prop_all]-burden_yld_prop_all[burden_yld_prop_all>2*Patients_prop_all]/2)),
sum(abs(RCTs_prop_NHI[burden_yld_prop_NHI>2*RCTs_prop_NHI]-burden_yld_prop_NHI[burden_yld_prop_NHI>2*RCTs_prop_NHI]/2)),
sum(abs(Patients_prop_NHI[burden_yld_prop_NHI>2*Patients_prop_NHI]-burden_yld_prop_NHI[burden_yld_prop_NHI>2*Patients_prop_NHI]/2)),
#YLL
sum(abs(RCTs_prop_all[burden_yll_prop_all>2*RCTs_prop_all]-burden_yll_prop_all[burden_yll_prop_all>2*RCTs_prop_all]/2)),
sum(abs(Patients_prop_all[burden_yll_prop_all>2*Patients_prop_all]-burden_yll_prop_all[burden_yll_prop_all>2*Patients_prop_all]/2)),
sum(abs(RCTs_prop_NHI[burden_yll_prop_NHI>2*RCTs_prop_NHI]-burden_yll_prop_NHI[burden_yll_prop_NHI>2*RCTs_prop_NHI/2])),
sum(abs(Patients_prop_NHI[burden_yll_prop_NHI>2*Patients_prop_NHI]-burden_yll_prop_NHI[burden_yll_prop_NHI>2*Patients_prop_NHI]/2))
),by="sim"]
#Rq: NAs appear when total number of RCTs or patients overall or across non HI regions is 0, meaning no
#possible share across regions. we suppress them
dui <- df[,lapply(.SD,function(x){quantile(x,probs=c(0.025,0.5,0.975),na.rm=TRUE)}),.SDcols=paste("V",1:32,sep="")]
dui <- cbind("All",c("low","med","up"),dui)
names(dui)<-c("Disease","UI",
paste(rep(paste(rep(c("RCTs_vs","Patients_vs"),times=8),
rep(c("daly","death","yld","yll"),each=4),
rep(rep(c("all","NHI"),each=2),times=4),sep="_"),2),rep(c("fill","nogap"),each=8*2),sep="_"))
data_f <- rbind(dui,data_f)
In [8]:
write.table(data_f,"../Data/Alignment_ratios_within_diseases_across_all_NHI_patients_metrs_burdens.txt")
In [ ]: